/* This program merges the core LBD data with the "containing county" version of the innovation 
   index to prepare data for regressions testing the localization of knowledge spillovers.

*/


log using "mk_contcnty_regdata_mylog.log", replace

global path "[redacted]"
global programs "${path}programs/"
global logs "${path}logs/"
global rawlbd "/projects/data/lbd/"
global imported "[redacted]"
global inter "${path}data/inter/"
global output "${path}data/output/"
global tables "${path}results/tables/"
global graphs "${path}results/graphs/"



************************
*** Prep Innov Index ***
************************

foreach year of numlist 1977/1997 {
   use ${inter}indinnovindex_xsection_fipssic3_contonly_top260.dta, clear
   gen year=`year'
   save ${inter}innovindex_cont_`year'.dta, replace
}
* In memory: ${inter}innovindex_1997.dta
foreach year of numlist 1977/1996 {
   append using ${inter}innovindex_cont_`year'.dta
}
save ${inter}innovindex_cont_fipssic3_1977_1997.dta, replace

foreach year of numlist 1977/1997 {
   rm ${inter}innovindex_cont_`year'.dta
}

************************
*** END Innov Index ***
************************


*****************************************************************
*** LBD - import, organize, get ready to merge in innov index ***
*****************************************************************

use ${inter}lbd_1977_1997_fips_sic3.dta, clear

merge 1:1 fips sic3 year using ${inter}innovindex_cont_fipssic3_1977_1997.dta
rename _merge merge_innovindex

foreach var in emp pay entemp muentemp suentemp entrants `eslist' ppw ewppw  avgestsize entrate numestabs survincemp97 dyincemp97 survincemp dyincemp cyentrant cyexit {
   replace `var'=0 if `var'==. & merge_innovindex==2
}

*********************************
*** END LBD-Innov Index Merge ***
*********************************


*************************
*** BALANCE THE PANEL ***
*************************

save ${inter}temp_prebalance_cont7.dta, replace

foreach year of numlist 1977/1997 {
   use ${inter}temp_prebalance_cont7.dta, clear
   contract fips sic3
   drop _freq
   gen year=`year'
  save ${inter}fips_sic_`year'_list_c7.dta, replace
}
* In memory: ${inter}fips_sic_1997_list_c7.dta  
foreach year of numlist 1977/1996 {
   append using ${inter}fips_sic_`year'_list_c7.dta
}
sort fips sic3 year
save ${inter}fips_sic_year_list_c7.dta, replace

use ${inter}temp_prebalance_cont7.dta, clear
merge 1:1 fips sic3 year using ${inter}fips_sic_year_list_c7.dta
unab eslist: es*
foreach var in emp pay entemp muentemp suentemp entrants `eslist' ppw ewppw  avgestsize entrate numestabs survincemp97 dyincemp97 survincemp dyincemp cyentrant cyexit  {
   replace `var'=0 if `var'==. & _merge==2
}
drop _merge


foreach year in 1980 {
   foreach var in pmfg cmfg puse cuse {
      rename `var'_freq`year' `var'`year'
      tab year if `var'`year'==.
      gen dr`var'`year'=(`var'`year'==.)
      egen altz`var'`year' = std(`var'`year'), mean(0) std(1)
      replace `var'`year' = 0 if `var'`year'==.
      egen mean`var'`year' = mean(`var'`year')
      egen sd`var'`year' = sd(`var'`year')
      gen z`var'`year' = (`var'`year' - mean`var'`year')/sd`var'`year'
   }
   drop mean* sd*
}


rm ${inter}temp_prebalance_cont7.dta


*************************
*** END BALANCING     ***
*************************

****************************************
*** MERGE County-Level External Data ***
****************************************

merge m:1 fips using ${inter}popdens_bashare_fips_1980.dta
drop if _m==2
drop _m


****************************************
*** MERGE County-Level External Data ***
****************************************

*********************************
*** Calc Vars for Regressions ***
*********************************

egen cntyemp1977 = sum(emp) if year==1977, by(fips)

egen temp = max(cntyemp1977), by(fips)
replace cntyemp1977 = temp if cntyemp1977==. & temp!=.
drop temp

gen abd = 0 if year<=1980
replace abd = 1 if year>1980 & year!=.

capture drop *nscmfg* *nscuse*
foreach year in 1980 {
   foreach var in cuse {
      gen xabdz`var'`year' = abd*z`var'`year'
   }
}

foreach year in 1980  {
   foreach var in cuse {
      gen xabd`var'`year' = abd*`var'`year'
   }
}

gen trend = year - 1980
gen xtrendabd = trend*abd
foreach year in 1980 {
   foreach var in cuse {
      gen xtrendz`var'`year' = trend*z`var'`year'
      gen xxtrendabdz`var'`year' = trend*z`var'`year'*abd
   }
}

unab eslist: es*
replace pay = pay/1000
foreach var in emp pay ppw entemp muentemp suentemp entrants cyentrant cyexit `eslist' avgestsize entrate numestabs {
   gen l`var'=ln(`var')
   gen l1`var'=ln(1+`var')
   gen l2`var'=ln(`var'+(`var'^2+1)^0.5)
}

drop numsic numfips
egen numfip = group(fips year)
label var numfip "numeric fips"
destring sic3, gen(numsic3) force
drop if numsic3==.
destring fips, gen(numfips)


*********************
*** END Calc Vars ***
*********************

compress
save ${output}lbd_innovindex_cont.dta, replace


log close

